def applymask(cls, xseq, maskseq=None): if maskseq is None: ret = xseq else: mask = T.tensordot(maskseq, T.ones((xseq.shape[2],)), 0) # f32^(batsize, seqlen, outdim) -- maskseq stacked masker = T.concatenate([T.ones((xseq.shape[0], xseq.shape[1], 1)), T.zeros((xseq.shape[0], xseq.shape[1], xseq.shape[2] - 1))], axis=2) # f32^(batsize, seqlen, outdim) -- gives 100% prob to output 0 ret = xseq * mask + masker * (1.0 - mask) return ret
def applymask(cls, xseq, maskseq): if maskseq is None: return xseq else: mask = T.tensordot(maskseq, T.ones((xseq.shape[2],)), 0) # f32^(batsize, seqlen, outdim) -- maskseq stacked masker = T.concatenate( [T.ones((xseq.shape[0], xseq.shape[1], 1)), T.zeros((xseq.shape[0], xseq.shape[1], xseq.shape[2] - 1))], axis=2) # f32^(batsize, seqlen, outdim) -- gives 100% prob to output 0 ret = xseq * mask + masker * (1.0 - mask) return ret
def apply(self, x): overx = self.adb[x] mask = overx > 0 mask = T.outer(mask, T.ones((self.outdim, ))) ret = T.switch(mask, self.override(overx), self.base(x)) self._maskfrom(ret, x) return ret
def apply(self, x, mask=None): # (batsize, seqlen, dim) mask = x.mask if mask is None else mask if mask is not None: assert (mask.ndim == x.ndim - 1) # mask must be (batsize, seqlen) realm = T.cast( T.tensordot(mask, T.ones((x.shape[-1], ), dtype="int32"), 0), "float32") x = x * realm input = x.dimshuffle(0, 2, 1, 'x') input_shape = None #input.shape convout = T.nnet.conv2d(input, self.filter, input_shape, self.filter_shape, border_mode=self.border_mode, subsample=(self.stride, 1), filter_flip=self.filter_flip) ret = convout[:, :, :, 0].dimshuffle(0, 2, 1) if mask is not None: # compute new mask print "conving the mask" mask_shape = None maskout = T.nnet.conv2d(T.cast(mask.dimshuffle(0, "x", 1, "x"), "float32"), self.maskfilter, mask_shape, self.maskfilter_shape, border_mode=self.border_mode, subsample=(self.stride, 1), filter_flip=self.filter_flip) mask = T.cast(maskout[:, 0, :, 0] > 0, "int32") ret.mask = mask return ret
def get_init_info(self, context, initstates, ctx_0=None, encmask=None): initstates = self.block.get_init_info(initstates) ctx_0 = self._get_ctx_t(context, initstates, encmask) if ctx_0 is None else ctx_0 if encmask is None: encmask = T.ones(context.shape[:2], dtype="float32") return [ctx_0, 0] + initstates, [encmask, context]
def apply_mask(cls, xseq, maskseq=None): if maskseq is None: ret = xseq else: mask = T.tensordot(maskseq, T.ones((xseq.shape[2],)), 0) # f32^(batsize, seqlen, outdim) -- maskseq stacked ret = mask * xseq return ret
def apply(self, seq, weights=None): # seq: (batsize, seqlen, dim), weights: (batsize, seqlen) inp = seq.dimswap(1, 0) # inp: (seqlen, batsize, dim) if weights is None: w = T.ones((inp.shape[0], inp.shape[1])) # (seqlen, batsize) else: self._weighted = True w = weights.dimswap(1, 0) outputs, _ = T.scan(fn=self.recwrap, sequences=[inp, w], outputs_info=[None]+self.block.get_init_info(seq.shape[0]), go_backwards=self._reverse) return self._get_apply_outputs(outputs)
def apply(self, inpseq): # int-(batsize, seqlen) inpenco = self._inpencoder( inpseq) # may carry mask, based on encoder's embedder batsize = inpenco.shape[0] outvocsize = self._memembmat.shape[0] mem_0 = T.concatenate([ T.ones((batsize, self._memlen, 1), dtype="float32") * 0.95, T.ones((batsize, self._memlen, outvocsize - 1), dtype="float32") * 0.05, ], axis=2) # (batsize, outseqlen, outvocsize) mem_0 = T.softmax(mem_0) core_init_states = self._core.get_init_info(batsize) core_state_spec = self._core.get_statespec(flat=False) assert (len(core_state_spec) == len(core_init_states)) h_0 = None # take last output of core states as initial state c = 0 for ss in core_state_spec: h_0_isout = False for sss in ss: if sss[0] == "output": h_0_isout = True h_0 = core_init_states[c] if not h_0_isout: h_0 = core_init_states[c] c += 1 if self._inp_pos_repr is not None: inpposvecs = self._inp_pos_repr(inpseq.shape[1]) inpposvecs = T.repeat(inpposvecs.dimadd(0), batsize, axis=0) inpenc = T.concatenate([inpenco, inpposvecs], axis=2) inpenc.mask = inpenco.mask else: inpenc = inpenco outputs = T.scan(fn=self.rec, outputs_info=[None, mem_0, h_0] + core_init_states, n_steps=self._nsteps, non_sequences=inpenc) ret = outputs[0] ret.push_extra_outs({"mem_0": mem_0, "h_0": h_0}) # DEBUGGING return ret[-1], ret
def _get_ctx_t0(self, ctx, ctx_0=None): if ctx_0 is None: if ctx.d.ndim == 2: # static context ctx_0 = ctx elif ctx.d.ndim > 2: # dynamic context (batsize, inseqlen, inencdim) assert(self.attention is not None) # 3D context only processable with attention (dynamic context) w_0 = T.ones((ctx.shape[0], ctx.shape[1]), dtype=T.config.floatX) / ctx.shape[1].astype(T.config.floatX) # ==> make uniform weights (??) ctx_0 = self.attention.attentionconsumer(ctx, w_0) '''else: ctx_0 = ctx[:, -1, :] # take the last context''' else: print "sum ting wong in SeqDecoder _get_ctx_t0()" return ctx_0
def apply(self, x, mask=None): # (batsize, seqlen, dim) mask = x.mask if mask is None else mask if mask is not None: assert (mask.ndim == x.ndim - 1) realm = T.tensordot(mask, T.ones((x.shape[-1], )), 0) if self.mode == "max": x = T.switch(realm, x, np.infty * (realm - 1)) else: x = x * realm if self.mode == "max": ret = T.max(x, axis=-2) elif self.mode == "sum": ret = T.sum(x, axis=-2) elif self.mode == "avg": ret = T.sum(x, axis=-2) / x.shape[-2] else: raise Exception("unknown pooling mode: {:3s}".format(self.mode)) # ret: (batsize, dim) if mask is not None: mask = 1 * (T.sum(mask, axis=-1) > 0) ret = T.switch(T.tensordot(mask, T.ones((x.shape[-1], )), 0), ret, T.zeros_like(ret)) ret.mask = mask return ret
def get_inits(self, initstates=None, batsize=None, ctx=None, ctxmask=None): if initstates is None: initstates = batsize elif issequence(initstates): if len( initstates ) < self.numstates: # fill up with batsizes for lower layers initstates = [batsize * (self.numstates - len(initstates))] + initstates ctxmask = ctx.mask if ctxmask is None else ctxmask ctxmask = T.ones(ctx.shape[:2], dtype="float32") if ctxmask is None else ctxmask nonseqs = [ctxmask, ctx] return self.get_init_info(initstates), nonseqs
def _get_apply_outputs(self, final, outputs, states, mask): ret = [] if "enc" in self._return: # final states of topmost layer ret.append(final) if "all" in self._return: # states (over all time) of topmost layer rete = outputs # (batsize, seqlen, dim) --> zero-fy according to mask if self._maskconfig.maskset == MaskSetMode.ZERO and mask is not None: fmask = T.tensordot(mask, T.ones((outputs.shape[2],)), 0) rete = rete * fmask ret.append(rete) if "states" in self._return: # final states (over all layers)??? pass # TODO: do we need to support this? if "mask" in self._return: ret.append(mask) if len(ret) == 1: return ret[0] else: return ret
def apply(self, x, mask=None): if self.enc.embedder is None: mindim = 3 maskdim = x.ndim - 1 else: mindim = 2 maskdim = x.ndim if mask is not None: assert (mask.ndim == maskdim) else: mask = T.ones(x.shape[:maskdim]) if x.ndim == mindim: return self.enc(x, mask=mask) elif x.ndim > mindim: ret = T.scan(fn=self.outerrec, sequences=[x, mask], outputs_info=None) return ret else: raise Exception("cannot have less than {} dims".format(mindim))