def __init__(self, base, override, maskid=-1, which=None, **kw): assert (base.outdim == override.outdim) baseindexes = Val(np.asarray(sorted(base.D.values()), dtype="int32")) basevar = base(baseindexes) if which is None: ad = { v: override.D[k] if k in override.D else 0 for k, v in base.D.items() } else: ad = { base.D[k]: override.D[k] if k in override.D else 0 for k in which } valval = np.zeros((max(ad.keys()) + 1, ), dtype="int32") for i in range(valval.shape[0]): valval[i] = ad[i] if i in ad else 0 overrideindexes = Val(valval) overridevar = override(overrideindexes) overridemask = np.repeat(valval[:, None], base.outdim, axis=1) v = Switch(overridevar, basevar, overridemask) super(NewOverriddenWordEmb, self).__init__(worddic=base.D, value=v(), dim=base.outdim, maskid=maskid)
def test_set_lr(self): attdist = LinearDistance(110, 110, 100) encdec = SimpleSeqEncDecAtt(inpvocsize=19, outvocsize=17, outconcat=False, encdim=110, decdim=110, attdist=attdist) encdec.dec.set_lr(0.1) encdec.dec.attention.set_lr(0.5) # TODO encdata = np.random.randint(0, 19, (2, 5)) decdata = np.random.randint(0, 17, (2, 5)) o = encdec(Val(encdata), Val(decdata)) #print "\n".join(["{}: {}".format(x, x.lrmul) for x in o.allparams]) #print "\n".join(["{}: {}".format(x, x.lrmul) for x in o.allparams]) encparams = encdec.enc.get_params() decparams = encdec.dec.get_params() attparams = encdec.dec.attention.get_params() print "\n".join(["{}: {}".format(x, x.lrmul) for x in encparams]) + "\n" print "\n".join(["{}: {}".format(x, x.lrmul) for x in decparams]) + "\n" for x in encparams: self.assertEqual(x.lrmul, 1.0) for x in decparams: if x not in attparams: self.assertEqual(x.lrmul, 0.1) else: self.assertEqual(x.lrmul, 0.5)
def test_reverse(self): xval = np.random.randint(0, 5, (2, 3, 4)) x = Val(xval) y = x.reverse(1) yval = xval[:, ::-1, :] self.assertTrue(np.allclose(yval, y.eval())) y = x.reverse(0, 1) yval = xval[::-1, ::-1, :] self.assertTrue(np.allclose(yval, y.eval()))
def __init__(self, indim=1000, dim=50, value=None, normalize=False, trainfrac=1.0, **kw): super(VectorEmbed, self).__init__(indim, dim, **kw) self.dim = dim self.indim = indim self.trainfrac = trainfrac if value is None: self.W = param((indim, dim), lrmul=self.trainfrac, name="embedder").glorotuniform() else: if trainfrac == 0.0: self.W = Val(value, name="embedder_val") else: self.W = Parameter(value, lrmul=self.trainfrac, name="embedder") if normalize: self.W = self.W.normalize(axis=1) # assertions assert (self.W.d.get_value().shape == (self.indim, self.dim))
def test_max_pool_masked(self): xval = np.random.random((100, 20, 50)).astype("float32") maskid = np.random.randint(1, 18, (100, )) mask = np.ones((xval.shape[:2])) for i in range(mask.shape[0]): mask[i, maskid[i]:] = 0 #xval[:, :, -1] = 100 x = Val(xval) x.mask = Val(mask) pool = GlobalPool1D(mode="max") pred = pool(x) predval = pred.eval() xval = xval - 1e9 * np.tensordot(1 - mask, np.ones( (xval.shape[-1], )), 0) predvalexp = np.max(xval, axis=1) self.assertTrue(np.allclose(predval, predvalexp))
def test_set_lr(self): lin = Linear(indim=10, dim=15) lin.set_lr(0.123) o = lin(Val(0)) #print ["{}: {}".format(x, x.lrmul) for x in o.allparams] for x in o.allparams: self.assertEqual(x.lrmul, 0.123)
def test_total_pool(self): xval = np.random.random((10, 5)) x = Val(xval) xpooled = MaxPool((None, ), axis=(1, ))(x) self.assertTrue(np.allclose(xpooled.eval(), xval.max(axis=1))) xpooled = SumPool((None, ), axis=(1, ))(x) self.assertTrue(np.allclose(xpooled.eval(), xval.sum(axis=1)))
def test_compound_var(self): aval = np.zeros((10, 10)) bval = np.ones((10, 10)) maskval = np.repeat(np.asarray([[0, 1, 0, 0, 1, 0, 0, 1, 1, 1]]).T, 10, axis=1) print maskval print aval * maskval + bval * (1 - maskval) a = Val(aval) b = Val(bval) mask = Val(maskval) cv = Switch(a, b, mask) cvpred = cv().eval() print cvpred self.assertTrue( np.allclose(cvpred, aval * maskval + bval * (1 - maskval)))
def setvalue(self, v): if isinstance(v, Var): self.W = v else: if self.trainfrac == 0.0: self.W = Val(v, name="embedder_val") else: self.W = Parameter(v, lrmul=self.trainfrac, name="embedder") self.indim, self.outdim = v.shape
def test_output_mask_strided(self): xval = np.random.random((100, 20, 50)).astype("float32") maskid = np.random.randint(3, 20, (100, )) mask = np.ones((xval.shape[:2])) for i in range(mask.shape[0]): mask[i, maskid[i]:] = 0 conv = Conv1D(indim=50, outdim=40, window=5, stride=4, border_mode="valid") x = Val(xval) x.mask = Val(mask) pred = conv(x) predmask = pred.mask print predmask.eval().shape print predmask.eval()[:5] print mask[:5]
def test_high_d_pool(self): xval = np.random.random((10, 6, 18, 14)) x = Val(xval) xpooled = AvgPool((2, 2), axis=(2, 1))(x) self.assertEqual(xpooled.eval().shape, (10, 3, 9, 14)) xpooled = AvgPool((2, 3), axis=(1, 2))(x) self.assertEqual(xpooled.eval().shape, (10, 3, 6, 14)) xpooled = AvgPool((2, 3), axis=(2, 1))(x) self.assertEqual(xpooled.eval().shape, (10, 2, 9, 14))
def test_enc_mask(self): xval = np.random.randint(1, 200, (100, 20)).astype("int32") maskid = np.random.randint(0, 5, (100, )) for i in range(xval.shape[0]): xval[i, maskid[i]:] = 0 x = Val(xval) enc = CNNSeqEncoder(indim=200, inpembdim=50, innerdim=5, maskid=0) pred = enc(x) #print pred.mask.eval().shape predval = pred.eval() print predval.shape
def test_output_shape_masked(self): xval = np.random.random((100, 20, 50)).astype("float32") maskid = np.random.randint(3, 20, (100, )) mask = np.ones((xval.shape[:2])) for i in range(mask.shape[0]): mask[i, maskid[i]:] = 0 conv = Conv1D(indim=50, outdim=40, window=5) x = Val(xval) x.mask = Val(mask) pred = conv(x) predmask = pred.mask predval = pred.eval() predvalmask = (predval != 0.0) * 1 predvalexpmask = np.ones_like(predvalmask) for i in range(predvalexpmask.shape[0]): predvalexpmask[i, min(maskid[i] + 2, predvalexpmask.shape[1]):, :] = 0 self.assertTrue(np.sum(predvalexpmask - predvalmask) == 0) self.assertEqual(predval.shape[:2], xval.shape[:2]) self.assertEqual(predval.shape[2], 40)
def __init__(self, encdim, invocsize, outvocsize, innerdim, seqlen, **kw): super(idx2seqTheano, self).__init__(**kw) self.encdim = encdim self.invocsize = invocsize self.outvocsize = outvocsize self.innerdim = innerdim self.seqlen = seqlen self.wordemb = param((invocsize, encdim)).uniform() self.idxtovec = Val(np.eye(outvocsize, outvocsize)) self.rnu_w = param((encdim + outvocsize, innerdim)).uniform() self.rnu_u = param((innerdim, innerdim)).uniform() self.outpf = theano.tensor.tanh self.olin = param((innerdim, outvocsize)).uniform() self.ownparams = [self.wordemb, self.rnu_u, self.rnu_w, self.olin]
def __init__(self, block=None, data=None, indim=200, outdim=50, **kw): assert(block is not None) ourdata = [] if not issequence(data): data = [data] for datae in data: if not isinstance(datae, (Var, Val)) and datae is not None: ourdata.append(Val(datae)) else: ourdata.append(datae) assert(isinstance(block, Block)) self.data = ourdata super(MemoryBlock, self).__init__(indim, outdim, **kw) # outdim = outdim of the contained block self.payload = block self.innervar = self.payload(*self.data) if None not in data else None # innervar: (indim, outdim)
def get_init_info( self, inpseq, batsize, maskseq=None ): # TODO: must evaluate enc here, in place, without any side effects """ VERY DIFFERENT FROM THE PURELY SYMBOLIC GET_INIT_INFO IN REAL REC BLOCKS !!! This one is used in decoder/prediction """ enco, allenco, encmask = self.enc.predict(inpseq, mask=maskseq) if self.statetrans is not None: topstate = self.statetrans.predict( enco, allenco ) # this gives unused input warning in theano - it's normal initstates = [topstate] else: initstates = batsize return self.dec.get_init_info( Val(allenco), [Val(x) for x in initstates] if issequence(initstates) else initstates, encmask=Val(encmask))
def test_multilevel_set_lr(self): l1 = Linear(10, 11) l2 = Linear(11, 12) l3 = Linear(12, 13) s = stack(l1, l2, l3) s[1].set_lr(0.5) s[2].set_lr(0.1) o = s(Val(0)) l1o = s[0](Val(0)) l2o = s[1](Val(0)) l3o = s[2](Val(0)) print["{}: {}".format(x, x.lrmul) for x in o.allparams] for x in o.allparams: if x in l1o.allparams: self.assertEqual(x.lrmul, 1.0) elif x in l2o.allparams: self.assertEqual(x.lrmul, 0.5) elif x in l3o.allparams: self.assertEqual(x.lrmul, 0.1) s.set_lr(0.21) o = s(Val(0)) print["{}: {}".format(x, x.lrmul) for x in o.allparams] for x in o.allparams: self.assertEqual(x.lrmul, 0.21)
def test_bidir(self): m = RNNSeqEncoder(indim=20, inpembdim=5, innerdim=(10, 10), bidir=True, maskid=0).with_outputs() xval = np.random.randint(1, 20, (7, 3)) xval = np.concatenate([xval, np.zeros_like(xval)], axis=1) x = Val(xval) fmp, mp = m(x) fmpval, mpval = fmp.eval(), mp.eval() self.assertTrue(np.allclose(fmpval[:, :10], mpval[:, -1, :10])) self.assertTrue(np.allclose(fmpval[:, 10:], mpval[:, 0, 10:])) mpm = mp.mask self.assertEqual(np.sum(mpm.eval() - xval > 0), 0)
def __init__(self, base, augment, **kw): assert (base.outdim == augment.outdim) super(AugmentedWordEmb, self).__init__(worddic=base.D, value=False, dim=base.outdim, normalize=base.normalize, trainfrac=base.trainfrac, **kw) self.base = base self.augment = augment self.ad = { v: augment.D[k] if k in augment.D else 0 for k, v in base.D.items() } valval = np.zeros((max(self.ad.keys()) + 1, ), dtype="int32") for i in range(valval.shape[0]): valval[i] = self.ad[i] if i in self.ad else 0 self.adb = Val(valval)
def __init__(self, indim=50, outdim=50, window=5, border_mode="half", stride=1, filter_flip=True, **kw): super(Conv1D, self).__init__(**kw) if isinstance(border_mode, tuple): (border_mode, ) = border_mode if isinstance(border_mode, int): border_mode = (border_mode, 0) self.border_mode = border_mode self.stride = stride self.filter_flip = filter_flip self.filter_shape = (outdim, indim, window, 1) self.filter = param(self.filter_shape, name="conv_w").glorotuniform() self.maskfilter_shape = (1, 1, window, 1) self.maskfilter = Val(np.ones(self.maskfilter_shape, dtype="float32"))
def test_mask_propagation_all_states(self): m = SeqEncoder(VectorEmbed(maskid=0, indim=100, dim=7), GRU(dim=7, innerdim=30)).all_outputs()\ .maskoptions(MaskSetMode.ZERO) data = np.random.randint(1, 100, (5, 3), dtype="int32") ndata = np.zeros_like(data) data = np.concatenate([data, ndata], axis=1) dataval = Val(data) embvar = m.embedder(dataval) embpred = embvar.eval() embmaskpred = embvar.mask.eval() encvar = m(dataval) encpred = encvar.eval() encmaskpred = encvar.mask.eval() print encpred.shape print encmaskpred.shape print encmaskpred self.assertTrue(np.sum(encmaskpred - embmaskpred) == 0)
def __init__(self, wordemb, wdic, **kw): D = wordemb.D nativeraretoken = wordemb.raretoken super(AdaptedWordEmb, self).__init__(worddic=wdic, value=False, dim=wordemb.outdim, normalize=wordemb.normalize, trainfrac=wordemb.trainfrac, raretoken=nativeraretoken, **kw) self.inner = wordemb self.ad = { v: D[k] if k in D else D[nativeraretoken] for k, v in wdic.items() } valval = np.zeros((max(self.ad.keys()) + 1, ), dtype="int32") for i in range(valval.shape[0]): valval[i] = self.ad[i] if i in self.ad else 0 self.adb = Val(valval)
def setvalue(self, v): if self.trainfrac == 0.0: self.W = Val(v, name="embedder_val") else: self.W = Parameter(v, lrmul=self.trainfrac, name="embedder")
def __init__(self, entmat): self.em = Val(entmat) # entmat: idx[word]^(numents, len(ent.name))
def test_dimswap(self): xval = np.random.randint(0, 5, (2, 3, 4)) x = Val(xval) y = x.dimswap(1, 0) eyval = xval.transpose(1, 0, 2) self.assertTrue(np.allclose(eyval, y.eval()))
from teafacto.core.base import tensorops as T, Val, param import numpy as np import sys x = Val(np.random.random((10, 10))) #y = Val(np.random.random((10,10))) y = param((10, 10), name="y").uniform() w = param((10, 10), name="w").uniform() #z = T.dot(x, y) z = (x + y) u = z * w s = T.nnet.sigmoid s2 = T.nnet.sigmoid print s == s2 sys.exit() print z.allparams print T.dot print z.ndim print z.dimswap zd = z.dimswap(1, 0) print z.dimswap(0, 1).allparams print y.dimswap(0, 1).allparams print T.nnet.conv.conv2d print u.norm(2).allparams print u.dimswap(0, 1).allparams print T.nnet.softmax(z).allparams zs = T.nnet.sigmoid(z) zs = zs + x zs.autobuild() zs.autobuild()
def __init__(self, vocsize, **kw): super(IdxToOneHot, self).__init__(vocsize, vocsize, **kw) self.W = Val(np.eye(vocsize, vocsize))
def load(self, *data): self.data = [ Val(d) if not isinstance(d, (Var, Val)) else d for d in data ] self.innervar = self.block(*self.data)
def __init__(self, wordmat): self.em = Val(wordmat)
def __init__(self, entmat): self.em = Val(entmat)